1. Regression

2. Exploratory Data Analysis

Code

Example 2.1 Linear trend

summary(fit <- lm(chicken~time(chicken))) # regress price on time
## 
## Call:
## lm(formula = chicken ~ time(chicken))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.7411 -3.4730  0.8251  2.7738 11.5804 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -7.131e+03  1.624e+02  -43.91   <2e-16 ***
## time(chicken)  3.592e+00  8.084e-02   44.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.696 on 178 degrees of freedom
## Multiple R-squared:  0.9173, Adjusted R-squared:  0.9168 
## F-statistic:  1974 on 1 and 178 DF,  p-value: < 2.2e-16
tsplot(chicken, ylab="cents per pound", col=4, lwd=2)
abline(fit)           # add the fitted regression line to the plot          

Example 2.2 LA pollution, temperature and mortality

par(mfrow=c(3,1))
tsplot(cmort, main="Cardiovascular Mortality", ylab="")
tsplot(tempr, main="Temperature",  ylab="")
tsplot(part, main="Particulates", ylab="")

pairs(cbind(Mortality=cmort, Temperature=tempr, Particulates=part))

## Regression
temp  = tempr-mean(tempr)  # center temperature    
temp2 = temp^2             # square it  
trend = time(cmort)        # time

fit = lm(cmort~ trend + temp + temp2 + part, na.action=NULL)

summary(fit)       # regression results
## 
## Call:
## lm(formula = cmort ~ trend + temp + temp2 + part, na.action = NULL)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.0760  -4.2153  -0.4878   3.7435  29.2448 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.831e+03  1.996e+02   14.19  < 2e-16 ***
## trend       -1.396e+00  1.010e-01  -13.82  < 2e-16 ***
## temp        -4.725e-01  3.162e-02  -14.94  < 2e-16 ***
## temp2        2.259e-02  2.827e-03    7.99 9.26e-15 ***
## part         2.554e-01  1.886e-02   13.54  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.385 on 503 degrees of freedom
## Multiple R-squared:  0.5954, Adjusted R-squared:  0.5922 
## F-statistic:   185 on 4 and 503 DF,  p-value: < 2.2e-16
summary(aov(fit))  # ANOVA table   (compare to next line)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## trend         1  10667   10667  261.62 <2e-16 ***
## temp          1   8607    8607  211.09 <2e-16 ***
## temp2         1   3429    3429   84.09 <2e-16 ***
## part          1   7476    7476  183.36 <2e-16 ***
## Residuals   503  20508      41                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(lm(cmort~cbind(trend, temp, temp2, part)))) # Table 2.1
##                                  Df Sum Sq Mean Sq F value Pr(>F)    
## cbind(trend, temp, temp2, part)   4  30178    7545     185 <2e-16 ***
## Residuals                       503  20508      41                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
num = length(cmort)                                     # sample size
AIC(fit)/num - log(2*pi)                                # AIC 
## [1] 4.721732
BIC(fit)/num - log(2*pi)                                # BIC 
## [1] 4.771699
# AIC(fit, k=log(num))/num - log(2*pi)                  # BIC (alt method) 
# (AICc = log(sum(resid(fit)^2)/num) + (num+5)/(num-5-2)) # AICc

Examples 2.3 Regression with lagged variables

fish = ts.intersect(rec, soiL6=lag(soi,-6), dframe=TRUE)   
summary(fit <- lm(rec~soiL6, data=fish, na.action=NULL))
## 
## Call:
## lm(formula = rec ~ soiL6, data = fish, na.action = NULL)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -65.187 -18.234   0.354  16.580  55.790 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   65.790      1.088   60.47   <2e-16 ***
## soiL6        -44.283      2.781  -15.92   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.5 on 445 degrees of freedom
## Multiple R-squared:  0.3629, Adjusted R-squared:  0.3615 
## F-statistic: 253.5 on 1 and 445 DF,  p-value: < 2.2e-16
tsplot(fish$rec, ylim=c(0,111))  # plot the data and the fitted values (not shown in text) 
lines(fitted(fit), col=2)

Examples 2.4 and 2.5 Differencing Chicken Prices

fit = lm(chicken~time(chicken), na.action=NULL) # regress chicken on time
par(mfrow=c(2,1))
tsplot(resid(fit), main="detrended")
tsplot(diff(chicken), main="first difference")

par(mfrow=c(3,1))     # plot ACFs
acf(chicken, 48, main="chicken")
acf(resid(fit), 48, main="detrended")
acf(diff(chicken), 48, main="first difference")

Example 2.6 Differencing Global Temperature

par(mfrow=c(3,1))
tsplot(globtemp, type="o")
tsplot(diff(globtemp), type="o")
mean(diff(globtemp))     # drift estimate = .008
## [1] 0.007925926
acf(diff(gtemp), 48, main="")

Example 2.7 Paleoclimatic Glacial Varves

par(mfrow=c(2,1))
tsplot(varve, main="varve", ylab="")
tsplot(log(varve), main="log(varve)", ylab="" )

Example 2.8

lag1.plot(soi, 12)

lag2.plot(soi, rec, 8)

Example 2.9

dummy = ifelse(soi<0, 0, 1)
fish  = ts.intersect(rec, soiL6=lag(soi,-6), dL6=lag(dummy,-6), dframe=TRUE)
summary(fit <- lm(rec~ soiL6*dL6, data=fish, na.action=NULL))
## 
## Call:
## lm(formula = rec ~ soiL6 * dL6, data = fish, na.action = NULL)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -63.291 -15.821   2.224  15.791  61.788 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   74.479      2.865  25.998  < 2e-16 ***
## soiL6        -15.358      7.401  -2.075   0.0386 *  
## dL6           -1.139      3.711  -0.307   0.7590    
## soiL6:dL6    -51.244      9.523  -5.381  1.2e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.84 on 443 degrees of freedom
## Multiple R-squared:  0.4024, Adjusted R-squared:  0.3984 
## F-statistic: 99.43 on 3 and 443 DF,  p-value: < 2.2e-16
attach(fish)
## The following object is masked from package:astsa:
## 
##     rec
plot(soiL6, rec)
lines(lowess(soiL6, rec), col=4, lwd=2)
points(soiL6, fitted(fit), pch='+', col=2)

tsplot(resid(fit)) # not shown ...

acf(resid(fit))   # ... but obviously not noise

Example 2.10 Using Regression to Discover a Signal in Noise

set.seed(1000)  # so you can reproduce these results
x = 2*cos(2*pi*1:500/50 + .6*pi) + rnorm(500,0,5)
z1 = cos(2*pi*1:500/50)  
z2 = sin(2*pi*1:500/50)
summary(fit <- lm(x~0+z1+z2))  # zero to exclude the intercept
## 
## Call:
## lm(formula = x ~ 0 + z1 + z2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.6589  -3.0872   0.1508   3.1302  13.5821 
## 
## Coefficients:
##    Estimate Std. Error t value Pr(>|t|)    
## z1  -0.7083     0.2958  -2.394    0.017 *  
## z2  -2.5473     0.2958  -8.611   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.678 on 498 degrees of freedom
## Multiple R-squared:  0.1382, Adjusted R-squared:  0.1348 
## F-statistic: 39.94 on 2 and 498 DF,  p-value: < 2.2e-16
par(mfrow=c(2,1))
tsplot(x)
tsplot(x, col=8, ylab=expression(hat(x)))
lines(fitted(fit), col=2)

Example 2.11 Moving Average Smoother

wgts = c(.5, rep(1,11), .5)/12
soif = filter(soi, sides=2, filter=wgts)
tsplot(soi)
lines(soif, lwd=2, col=4)
par(fig = c(.75, 1, .75, 1), new = TRUE) # the insert
nwgts = c(rep(0,20), wgts, rep(0,20))
plot(nwgts, type="l", ylim = c(-.02,.1), xaxt='n', yaxt='n', ann=FALSE)

Example 2.12 Kernel Smoothing

tsplot(soi)
lines(ksmooth(time(soi), soi, "normal", bandwidth=1), lwd=2, col=4)
par(fig = c(.75, 1, .75, 1), new = TRUE) # the insert
gauss = function(x) { 1/sqrt(2*pi) * exp(-(x^2)/2) }
x = seq(from = -3, to = 3, by = 0.001)
plot(x, gauss(x), type ="l", ylim=c(-.02,.45), xaxt='n', yaxt='n', ann=FALSE)

Example 2.13

tsplot(soi)
lines(lowess(soi, f=.05), lwd=2, col=4) # El Nino cycle
lines(lowess(soi), lty=2, lwd=2, col=2) # trend (with default span)

Example 2.14

tsplot(soi)
lines(smooth.spline(time(soi), soi, spar=.5), lwd=2, col=4)
lines(smooth.spline(time(soi), soi, spar= 1), lty=2, lwd=2, col=2)

Example 2.15

plot(tempr, cmort, xlab="Temperature", ylab="Mortality")
lines(lowess(tempr, cmort))